{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Lab 11b - Classwork"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" x1 | \n",
" x2 | \n",
" x3 | \n",
" y | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" red | \n",
" 3.0 | \n",
" yes | \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
" blue | \n",
" 6.0 | \n",
" no | \n",
" 1 | \n",
"
\n",
" \n",
" 2 | \n",
" blue | \n",
" 8.2 | \n",
" yes | \n",
" 0 | \n",
"
\n",
" \n",
" 3 | \n",
" red | \n",
" 1.0 | \n",
" no | \n",
" 0 | \n",
"
\n",
" \n",
" 4 | \n",
" red | \n",
" 5.0 | \n",
" yes | \n",
" 1 | \n",
"
\n",
" \n",
" 5 | \n",
" blue | \n",
" 6.0 | \n",
" yes | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" x1 x2 x3 y\n",
"0 red 3.0 yes 0\n",
"1 blue 6.0 no 1\n",
"2 blue 8.2 yes 0\n",
"3 red 1.0 no 0\n",
"4 red 5.0 yes 1\n",
"5 blue 6.0 yes 0"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame({\"x1\": [\"red\",\"blue\",\"blue\",\"red\",\"red\",\"blue\"], \\\n",
" \"x2\": [3, 6, 8.2, 1, 5,6], \\\n",
" \"x3\": [\"yes\", \"no\", \"yes\",\"no\", \"yes\",\"yes\"], \\\n",
" \"y\": [0,1, 0,0, 1, 0]})\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/megan/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:2: FutureWarning: specifying 'categories' or 'ordered' in .astype() is deprecated; pass a CategoricalDtype instead\n",
" \n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" x1 | \n",
" x2 | \n",
" x3 | \n",
" y | \n",
" y_predicted | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" red | \n",
" 3.0 | \n",
" yes | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
" blue | \n",
" 6.0 | \n",
" no | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
" 2 | \n",
" blue | \n",
" 8.2 | \n",
" yes | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 3 | \n",
" red | \n",
" 1.0 | \n",
" no | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" 4 | \n",
" red | \n",
" 5.0 | \n",
" yes | \n",
" 1 | \n",
" 1 | \n",
"
\n",
" \n",
" 5 | \n",
" blue | \n",
" 6.0 | \n",
" yes | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" x1 x2 x3 y y_predicted\n",
"0 red 3.0 yes 0 0\n",
"1 blue 6.0 no 1 0\n",
"2 blue 8.2 yes 0 0\n",
"3 red 1.0 no 0 1\n",
"4 red 5.0 yes 1 1\n",
"5 blue 6.0 yes 0 1"
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"y_predicted\"] = [0,0,0,1,1,1]\n",
"df[\"y_predicted\"] = df[\"y_predicted\"].astype(\"category\", categories = [0,1])\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" Predicted | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" Actual | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2 | \n",
" 2 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"Predicted 0 1\n",
"Actual \n",
"0 2 2\n",
"1 1 1"
]
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"confusion_matrix = pd.crosstab(df[\"y\"], df[\"y_predicted\"], \\\n",
" rownames = [\"Actual\"], colnames = [\"Predicted\"], \\\n",
" dropna = False)\n",
"confusion_matrix"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [],
"source": [
"true_pos = confusion_matrix[0][0]\n",
"false_pos = confusion_matrix[1][0]\n",
"false_neg = confusion_matrix[0][1]\n",
"true_neg = confusion_matrix[1][1]\n",
"sensitivity = true_pos/(true_pos + false_neg)\n",
"specificity = true_neg/(true_neg + false_pos)\n",
"precision = true_pos/(true_pos + false_pos)\n",
"accuracy = (true_pos + true_neg)/(true_pos + false_pos + false_neg + true_neg)"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"# of true positives: 2\n",
"# of false positives: 2\n",
"# of false negatives: 1\n",
"# of true negatives: 1\n",
"Sensitivity: 0.6666666666666666\n",
"Specificity: 0.3333333333333333\n",
"Precision: 0.5\n",
"Accuracy: 0.5\n"
]
}
],
"source": [
"print(\"# of true positives:\",true_pos)\n",
"print(\"# of false positives:\",false_pos)\n",
"print(\"# of false negatives:\",false_neg)\n",
"print(\"# of true negatives:\",true_neg)\n",
"print(\"Sensitivity:\",sensitivity)\n",
"print(\"Specificity:\",specificity)\n",
"print(\"Precision:\",precision)\n",
"print(\"Accuracy:\",accuracy)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}